capture log close
log using daly-analysis.txt, replace text

// program:	daly-analysis.do
// task:	data cleaning for DALY health state survey
// input:	daly-data
// output:	
// project: DALY and value judgments
// author:  sam harper \ 24mar2018

//  #0
//  program setup

version 14
set linesize 80
clear all
macro drop _all


// #1
// bring in data

use daly-data, clear

datasignature confirm


// #2
// Table 1 descriptives

* first need to move from long to wide format
keep wid survey question rating timecat age4 woman race4 educ3
reshape wide rating, i(wid) j(question)

label var survey "Survey version"
label define sv 0 "Version 1" 1 "Version 2", modify
label values survey sv
label var timecat "Survey time to completion"
label var age4 "Age group"
label var woman "Gender"
label define gender 0 "Male" 1 "Female", modify
label values woman gender
label var race4 "Race"
label var educ3 "Highest education completed"
label define educ3 0 "High school or less" 1 "University degree" ///
	2 "Graduate school or higher", modify
label values educ3 educ3

tabout age4 woman race4 educ3 timecat survey using daly-table1.tex, ///
	c(freq col) f(0c 1) clab(N Col_%) stats(chi2) npos(row) rep ///
	style(tex) bt font(bold)  cl1(2-9) cl2(2-3 4-5 6-7 8-9) ///
	topf(top.tex) botf(bot.tex) botstr("N=1592") ///
	topstr(15cm)
	

// #3
// Overall effect of adding information
use daly-data, clear

* Table 2 with mean rating for each question, by added information
table gbdd added, c(mean rating) format(%3.1f) row

* matrix to collect results
matrix define mrating = J(36,6,-99)
qui levelsof gbdd, local(levels)
foreach l of local levels {
  matrix mrating[`l',1] = `l'
  qui ttest rating if gbdd==`l', by(added)
  matrix mrating[`l',2] = r(mu_2)
  matrix mrating[`l',3] = r(mu_1)
  matrix mrating[`l',4] = r(mu_2) - r(mu_1)
  matrix mrating[`l',5] = (r(mu_2) - r(mu_1)) - invnorm(0.975)*r(se)
  matrix mrating[`l',6] = (r(mu_2) - r(mu_1)) + invnorm(0.975)*r(se)
  }
matrix list mrating, format(%3.1f)
label list gbdd
  


* overall effect of adding information, averaged across all questions
reg rating i.added, vce(cl wid)
estimates store crude

* with adjustment for additional demographic covariates
reg rating i.added i.age4 i.woman i.race4 i.educ3 i.timecat, vce(cl wid)
estimates store adjdem

* further adjustment for each question
reg rating i.added i.age4 i.woman i.race4 i.educ3 i.timecat i.gbdd, ///
	vce(cl wid)
estimates store adjq


* Table 3 with model results
esttab crude adjdem adjq using daly-table3.txt, b(%4.2f) ci(%4.2f) ///
	keep (1.added 1.age4 2.age4 3.age4 1.woman 1.race4 2.race4 3.race4 ///
	1.educ3 2.educ3 1.timecat 2.timecat) nostar not compress ///
	nocons nodep wide replace coeflabels(1.added ///
	"Yes vs. No" 1.age4 "25-34y vs. <25y" 2.age4 ///
	"35-44y vs. <25y" 3.age4 "45+ vs. <25y" 1.woman ///
	"Women vs. Men" 1.race4 "Indian vs. White" 2.race4 "Asian vs. White" ///
	3.race4 "Other vs. White" 1.educ3 "University vs. $\leq$ HS" 2.educ3 ///
	"Graduate+ vs. $\leq$ HS" 1.timecat "10-19 vs <10 mins" 2.timecat ///
	"20+ vs. <10 mins") mtitle("Crude" "+Demographics" "+Questions*") ///
	refcat(1.added "\textbf{Added information}" 1.age4 "\textbf{Age group}" ///
	1.woman "\textbf{Gender}" 1.race4 "\textbf{Race}" 1.educ3 ///
	"\textbf{Education}" 1.timecat "\textbf{Survey time}", nolabel) ///
	collabels($\beta$ 95\%CI) ///
	note("*Coefficients for individual questions omitted. 95\% CI (clustered by respondent) in brackets.")

	
	
// #4
// effect within each question

* first preserve the full dataset
preserve

* now estimate the crude regression for each question separately 
* and save each estimate and standard error as a dataset
statsby _b _se, by(question) clear: reg rating i.added

* now merge with data on GBD weights and categories
merge 1:1 question using daly-weights
keep if _merge==3
drop _merge

* create some variable labels
* category of additional information
label define catl 1 "Added information: psychological" ///
	2 "Added information: familial" 3 "Added information: social", modify
label values cat catl
label var cat "category of added information"

* GBD health state descriptions
encode gbddesc, gen(gbdd)
label var gbdd "GBD health states"

* GBD disability weights
label var dw "GBD 2010 disability weight"
note dw: "Extracted from IHME website on 16apr2015 by sh http://ghdx.healthdata.org/record/global-burden-disease-study-2010-gbd-2010-disability-weights"

* categorize disability weights
gen dwc=0 if dw<.25 & dw!=.
replace dwc=1 if dw>=.25 & dw<.5
replace dwc=2 if dw>.5 & dw!=.
label var dwc "Disability weight category"
label define dwcl 0 "Disability weight <0.25" 1 "Disability weight 0.25-0.49" ///
	2 "Disability weight 0.50+", modify
label values dwc dwcl


* now produce overall forest plot
metan _stat_2 _stat_5, random ///
	label(namevar=gbdd) lcols(gbdd) texts(150) ///
	effect("Diff") nobox nowt nowarning sortby(_stat_2) ///
	favours(Worse health rating # Better health rating) ///
	graphregion(fcolor(white) lcolor(white)) ///
	ciopt(lwidth(vthin)) diamopt(lcolor(black)) //////
	pointopt(msymbol(circle) mcolor(gray) msize(small) mfcolor(gray)) ///
	xlabel(-10,-5,0,5,10) xsize(6.5) ysize(8) astext(45)

graph export daly-fig1.pdf, replace


* heterogeneity tests by GBD disability weight category
metan _stat_2 _stat_5, by(dwc) nograph fixedi

metan _stat_2 _stat_5, random by(dwc) nooverall ///
	label(namevar=gbdd) lcols(gbdd) texts(150) ///
	effect("Diff") nobox nowt nowarning sortby(_stat_2) ///
	favours(Worse health rating # Better health rating) ///
	graphregion(fcolor(white) lcolor(white)) ///
	ciopt(lwidth(vthin)) diamopt(lcolor(black)) //////
	pointopt(msymbol(circle) mcolor(gray) msize(small) mfcolor(gray)) ///
	 xlabel(-10,-5,0,5,10) xsize(6.5) ysize(8) astext(45)

graph export daly-fig3.pdf, replace	 
	 
* now by type of information
metan _stat_2 _stat_5, by(cat) nograph fixedi
	 
metan _stat_2 _stat_5, random by(cat) nooverall ///
	label(namevar=gbdd) lcols(gbdd) texts(150) ///
	effect("Diff") nobox nowt nowarning sortby(_stat_2) ///
	favours(Worse health rating # Better health rating) ///
	graphregion(fcolor(white) lcolor(white)) ///
	ciopt(lwidth(vthin)) diamopt(lcolor(black)) //////
	pointopt(msymbol(circle) mcolor(gray) msize(small) mfcolor(gray)) ///
	 xlabel(-10,-5,0,5,10) xsize(6.5) ysize(8) astext(45)

graph export daly-fig4.pdf, replace
* restore full dataset
restore



//#5
// differential effect by  GBD disability weight

* as continuous
reg rating i.added##c.dw, vce(cl wid)
estimates store emmdcont
eststo dcont: margins r.added, at(dw=(0(.2)1)) post coeflegend
marginsplot, name(emmdcont, replace) scheme(sj) ///
	ytitle("Difference in health rating (with vs. without)") ///
	title("GBD disability weight (continuous)") yline(0, lpattern(dash)) 
	
coefplot dcont, vertical scheme(sj) yline(0)   ///
	coeflabels(r1vs0.added@1._at= "0.0" r1vs0.added@2._at= "0.2" ///
	r1vs0.added@3._at= "0.4" r1vs0.added@4._at= "0.6" ///
	r1vs0.added@5._at= "0.8" r1vs0.added@6._at= "1.0") ///
	title("GBD disability weight (continuous)", size(medium)) ///
	ytitle("Change in health rating (with vs. without)", height(5)) ///
	xtitle("GBD 2010 disability weight", height(5)) recast(line) lwidth(*2) ///
	ciopts(recast(rline) lpattern(dash)) name(emmdcont, replace) ///
	graphregion(fcolor(white) lcolor(white))


* categories of disability weight
reg rating i.added##i.dwc, vce(cl wid)
estimates store emmdcat

eststo dcat: margins r.added, over(dwc) post coeflegend

coefplot dcat, vertical scheme(sj) yline(0)  ///
	coeflabels(r1vs0.added@0.dwc= "<0.25" r1vs0.added@1.dwc= "0.25-0.49" ///
	r1vs0.added@2.dwc= "0.50+") ///
	title("GBD disability weight (categorical)", size(medium)) ///
	ytitle("Change in health rating (with vs. without)", height(5)) ///
	xtitle("GBD 2010 disability weight category", height(5)) ///
	name(emmdcat, replace) graphregion(fcolor(white) lcolor(white))
	
	
graph combine emmdcont emmdcat, ycommon rows(1) scheme(sj) ///
	graphregion(fcolor(white) lcolor(white))
graph export daly-fig2.pdf, replace

esttab emmdcont emmdcat using daly-table4.tex, b(%4.2f) ci(%4.2f) ///
	keep (1.added dw 1.added#c.dw 1.dwc 2.dwc 1.added#1.dwc 1.added#2.dwc) ///
	nostar not compress ///
	nocons nodep wide replace coeflabels(1.added ///
	"Added info (Yes vs. No)" dw "GBD disability weight" ///
	1.added#c.dw "Added*disability weight" 1.dwc ///
	"Disability weight 0.25-0.49 vs. <0.25" 2.dwc ///
	"Disability weight 0.50+ vs. <0.25" 1.added#1.dwc ///
	"Added*Disability weight 0.25-0.49" 1.added#2.dwc ///
	"Added*Disability weight 0.50+") mtitle("Continuous weight" ///
	"Categorical weight") ///
	collabels($\beta$ 95\%CI) ///
	note("95\% CI in brackets. Standard errors clustered by respondent.")


// #6
// differential effect by category of added information
reg rating i.added##i.dwc##i.cat, vce(cl wid)
estimates store emmccat

* Overall effect by information type
eststo m0: margins r.added, over(cat) post coeflegend
estimates store emmccat1m

* Plot for overall effect
coefplot m0, scheme(sj) drop(_cons) vertical ///
	coeflabels(r1vs0.added@1.cat= "Psychological" ///
	r1vs0.added@2.cat= "Familial" r1vs0.added@3.cat= "Social") ///
	yline(0) name(emmccat1, replace) ///
	ytitle("Change in health rating (with vs. without)", height(5)) ///
	title("By information type", size(medium)) ///
	graphregion(fcolor(white) lcolor(white)) ///
	yscale(range(-5 5)) ylabel(-4(2)4,angle(horizontal))

* Effect by information type and GBD category
estimates restore emmccat
qui eststo m1: margins r.added if dwc==0, over(cat) post
estimates restore emmccat
qui eststo m2: margins r.added if dwc==1, over(cat) post
estimates restore emmccat
qui eststo m3: margins r.added if dwc==2, over(cat) post

* Plot for effect by information type and GBD category
coefplot (m1, msymbol(D) offset(-.1)) (m2, msymbol(O)) (m3, msymbol(T) offset(.1)), scheme(sj) ///
	drop(_cons) vertical coeflabels(r1vs0.added@1.cat= "Psychological" ///
	r1vs0.added@2.cat= "Familial" r1vs0.added@3.cat= "Social") ///
	yline(0) legend(ring(0) position(2) cols(1) ///
	lab(2 "<0.25") lab(4 "0.25-0.49") lab(6 "0.50+") ///
	title("GBD weight", size(medsmall))) name(emmccat2, replace) ///
	ytitle("Change in health rating (with vs. without)", height(5)) ///
	title("By GBD weight and information type", size(medium)) ///
	graphregion(fcolor(white) lcolor(white)) ///
	yscale(range(-5 5)) ylabel(-4(2)4,angle(horizontal)) 

* Combine plots
graph combine emmccat1 emmccat2, ycommon rows(1) scheme(sj) ///
	graphregion(fcolor(white) lcolor(white))
graph export daly-fig4.pdf, replace

* Restore estimates and get marginal effects for table
estimates restore emmccat
margins r.added, over(cat dwc) post coeflegend
estimates store emmccat2m

* Table of estimates
esttab emmccat1m emmccat2m using daly-table5.tex, b(%4.2f) ci(%4.2f) ///
	keep(r1vs0.added@1.cat r1vs0.added@2.cat r1vs0.added@3.cat ///
	r1vs0.added@1.cat#0.dwc r1vs0.added@1.cat#1.dwc r1vs0.added@1.cat#2.dwc /// 
	r1vs0.added@2.cat#0.dwc r1vs0.added@2.cat#1.dwc r1vs0.added@2.cat#2.dwc /// 
	r1vs0.added@3.cat#0.dwc r1vs0.added@3.cat#1.dwc r1vs0.added@3.cat#2.dwc) /// 
	nostar not compress ///
	nocons nodep wide replace coeflabels( ///
	r1vs0.added@1.cat	"Added effect: psychological" ///
	r1vs0.added@2.cat	"Added effect: familial" ///
	r1vs0.added@3.cat	"Added effect: social" ///
	r1vs0.added@1.cat#0.dwc "Added effect: psychological, GBD<0.25" ///
	r1vs0.added@1.cat#1.dwc "Added effect: psychological, GBD 0.25-0.49" ///
	r1vs0.added@1.cat#2.dwc "Added effect: psychological, GBD>0.50" ///
	r1vs0.added@2.cat#0.dwc "Added effect: familial, GBD<0.25" ///
	r1vs0.added@2.cat#1.dwc "Added effect: familial, GBD 0.25-0.49" ///
	r1vs0.added@2.cat#2.dwc "Added effect: familial, GBD>0.50" ///
	r1vs0.added@3.cat#0.dwc "Added effect: social, GBD<0.25" ///
	r1vs0.added@3.cat#1.dwc "Added effect: social, GBD 0.25-0.49" ///
	r1vs0.added@3.cat#2.dwc "Added effect: social, GBD>0.50") ///
	mtitle("Overall by category" "Category and GBD weight") ///
	collabels($\beta$ 95\%CI) ///
	note("95\% CI in brackets. Standard errors clustered by respondent.")


// #7
// effect of adding familial information by gender
use daly-data, clear
datasignature confirm

label define gender 0 "Male" 1 "Female", modify
label values woman gender

reg rating i.added##i.cat##i.woman, vce(cl wid)
testparm added#cat#woman
local gp = round(r(p), 0.001)
estimates store emmg1
	
* marginal effects by category and gender
margins r.added, over(cat woman)
marginsplot, xdim(woman) scheme(sj) yline(0, lpattern(dash)) ylabel(-4(2)4)  ///
	ytitle("Difference in health rating (with vs. without)") /// 
	xtitle("Gender") ///
	title("Overall effect by type of information and gender") ///
	legend(ring(0) position(12) rows(1) title("Type of information added", ///
	size(medsmall))) ///
	note("P-value for differential effect by gender within information type=0`gp'")

graph export daly-afig1.pdf, replace

esttab emmg1 using daly-table6.tex, b(%4.2f) ci(%4.2f) ///
	keep (1.added 1.woman 1.added#1.woman) nostar not compress ///
	nocons nodep wide replace coeflabels(1.added ///
	"Added info (Yes vs. No)" 1.woman "Women vs. men" ///
	1.added#1.woman "Added info X women vs. men") ///
	mtitle("Mean rating") ///
	collabels($\beta$ 95\%CI) ///
	note("95\% CI in brackets. Standard errors clustered by respondent.")

graph export daly-fig5.pdf, replace
graph combine emmdcont emmdcat, ycommon rows(1) scheme(sj)



// #8
// differences by education
label var educ3 "Highest education completed"
label define educ3 0 "High school or less" 1 "University degree" ///
	2 "Graduate school or higher", modify
label values educ3 educ3

reg rating i.added##i.cat##i.educ3, vce(cl wid)
testparm added#cat#educ3
local ep = round(r(p), 0.01)
margins r.added, over(cat educ3)
marginsplot, xdim(educ3) scheme(sj) yline(0, lpattern(dash)) ylabel(-4(2)4)  ///
	ytitle("Difference in health rating (with vs. without)") /// 
	title("Overall effect by type of information and education") ///
	legend(ring(0) position(12) rows(1) title("Type of information added", ///
	size(medsmall))) ///
	note("P-value for differential effect by education within information type=0`ep'")

graph export daly-afig2.pdf, replace

eststo emmeduc: margins r.added, over(educ3) post coeflegend
esttab emmeduc using daly-table7.tex, b(%4.2f) ci(%4.2f) ///
	keep(r1vs0.added@0.educ3 r1vs0.added@1.educ3 r1vs0.added@2.educ3) ///
	nostar not nonum compress ///
	nocons nodep wide replace coeflabels(r1vs0.added@0.educ3 ///
	"<=High school" r1vs0.added@1.educ3 "Univ degree" r1vs0.added@2.educ3 ///
	"Graduate +") mtitle("Mean rating") ///
	collabels($\beta$ 95\%CI) ///
	note("95\% CI in brackets. Standard errors clustered by respondent.")
	

	
// #9
// differences by age	
label var age4 "Age group"

reg rating i.added##i.cat##i.age4, vce(cl wid)
testparm added#cat#age4
local ap = round(r(p), 0.001)
margins r.added, over(cat age4)
marginsplot, xdim(age4) scheme(sj) yline(0, lpattern(dash)) ylabel(-4(2)4)  ///
	ytitle("Difference in health rating (with vs. without)") /// 
	title("Overall effect by type of information and age") ///
	legend(ring(0) position(12) rows(1) title("Type of information added", ///
	size(medsmall))) ///
	note("P-value for differential effect by age within information type=0`ap'")

graph export daly-afig3.pdf, replace
	
log close
exit


	
	




